In [1]:
import numpy as np
import pandas as pd
import copy
import geopandas as gpd
import plotly.graph_objs as go
import plotly as py
from plotly.offline import init_notebook_mode, iplot
import matplotlib.pyplot as plt
In [2]:
states = pd.read_excel("./messy_data/states.xlsx")
states.head()
Out[2]:
State Abbreviation
0 Alabama AL
1 Alaska AK
2 Arizona AZ
3 Arkansas AR
4 California CA
In [3]:
states.drop(states.index[states.Abbreviation == "DC"], inplace=True)
states.columns = ["Jurisdiction", "Jurisdiction Abbreviation"]
states = states.append({"Jurisdiction" : "Federal", "Jurisdiction Abbreviation" : "FED"}, ignore_index=True)
states.tail()
Out[3]:
Jurisdiction Jurisdiction Abbreviation
47 West Virginia WV
48 Wisconsin WI
49 Wyoming WY
50 Puerto Rico PR
51 Federal FED
In [4]:
malePrisonerPopulation = pd.read_excel("./messy_data/Prisoners under the jurisdiction of state or federal correctional authorities 1978-2016.xlsx", sheet_name="Male", header=9, nrows=54).dropna(1, "all").dropna(0)
malePrisonerPopulation.head()
Out[4]:
Jurisdiction 1978 1979 1980 1981 1982 1983 1984 1985 1986 ... 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016
1 Federal 27975.0 24835.0 22964.0 26574.0 28046.0 30172.0 32267.0 37819.0 41575.0 ... 186280 188007 194493 196222 202462 203766 201697 196568 183502 176495
3 Alabama 5368.0 5221.0 6278.0 7339.0 8757.0 9375.0 9961.0 10453.0 11094.0 ... 27254 28277 29419 29261 29696 29782 29660 29182 28220 26506
4 Alaska 678.0 729.0 801.0 977.0 1276.0 1557.0 1888.0 2207.0 2344.0 ... 4603 4511 4696 4782 4932 4934 4450 5091 4761 4024
5 Arizona 3275.0 3573.0 4153.0 4986.0 5788.0 6566.0 7482.0 8134.0 8948.0 ... 34286 35739 36768 36521 36470 36447 37402 38295 38738 38323
6 Arkansas 2553.0 2927.0 2805.0 3197.0 3755.0 4050.0 4286.0 4394.0 4477.0 ... 13248 13656 14147 15040 14995 13594 15904 16476 16305 16161

5 rows × 40 columns

In [5]:
femalePrisonerPopulation = pd.read_excel("./messy_data/Prisoners under the jurisdiction of state or federal correctional authorities 1978-2016.xlsx", sheet_name="Female", header=9, nrows=54).dropna(1, "all").dropna(0)
femalePrisonerPopulation.head()
Out[5]:
Jurisdiction 1978 1979 1980 1981 1982 1983 1984 1985 1986 ... 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016
1 Federal 1828.0 1536.0 1399.0 1559.0 1627.0 1754.0 1996.0 2404.0 2833.0 ... 13338 13273 13625 13549 13900 14049 14169 13999 12953 12697
3 Alabama 257.0 243.0 265.0 318.0 476.0 481.0 521.0 562.0 616.0 ... 2158 2231 2455 2503 2574 2649 2721 2589 2590 2377
4 Alaska 34.0 31.0 21.0 47.0 46.0 74.0 79.0 122.0 116.0 ... 564 503 589 609 665 699 631 703 577 410
5 Arizona 181.0 176.0 219.0 237.0 281.0 323.0 363.0 397.0 486.0 ... 3460 3763 3776 3688 3550 3633 3775 3964 3981 3997
6 Arkansas 101.0 115.0 106.0 131.0 167.0 196.0 196.0 217.0 224.0 ... 1066 1060 1061 1164 1113 1060 1331 1398 1402 1376

5 rows × 40 columns

In [6]:
totalPrisonerPopulation = pd.read_excel("./messy_data/Prisoners under the jurisdiction of state or federal correctional authorities 1978-2016.xlsx", sheet_name="Total", header=9, nrows=54).dropna(1, "all").dropna(0)
totalPrisonerPopulation.head()
Out[6]:
Jurisdiction 1978 1979 1980 1981 1982 1983 1984 1985 1986 ... 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016
1 Federal 29803.0 26371.0 24363.0 28133.0 29673.0 31926.0 34263.0 40223.0 44408.0 ... 199618 201280 208118 209771 216362 217815 215866 210567 196455 189192
3 Alabama 5625.0 5464.0 6543.0 7657.0 9233.0 9856.0 10482.0 11015.0 11710.0 ... 29412 30508 31874 31764 32270 32431 32381 31771 30810 28883
4 Alaska 712.0 760.0 822.0 1024.0 1322.0 1631.0 1967.0 2329.0 2460.0 ... 5167 5014 5285 5391 5597 5633 5081 5794 5338 4434
5 Arizona 3456.0 3749.0 4372.0 5223.0 6069.0 6889.0 7845.0 8531.0 9434.0 ... 37746 39502 40544 40209 40020 40080 41177 42259 42719 42320
6 Arkansas 2654.0 3042.0 2911.0 3328.0 3922.0 4246.0 4482.0 4611.0 4701.0 ... 14314 14716 15208 16204 16108 14654 17235 17874 17707 17537

5 rows × 40 columns

In [7]:
def cleanAndMeltPopulationTable(table, states):
    
    # add DC data to Federal, and delete it
    table.loc[table.Jurisdiction == "Federal"].iloc[:, 1:] = table.loc[table.Jurisdiction == "Federal"].iloc[:, 1:] + table.loc[table.Jurisdiction == "District of Columbia"].iloc[:, 1:].replace("--", 0)
    table.drop(table.index[table.Jurisdiction == "District of Columbia"], inplace=True)

    table = table.melt(id_vars = "Jurisdiction", var_name = "Year", value_name = "Population")
    table.Year = table.Year.astype(int)
    
    table = states.merge(table, on="Jurisdiction")
    
    return table
In [8]:
meltedMalePrisonerPopulation = cleanAndMeltPopulationTable(malePrisonerPopulation, states)
meltedFemalePrisonerPopulation = cleanAndMeltPopulationTable(femalePrisonerPopulation, states)
meltedTotalPrisonerPopulation = cleanAndMeltPopulationTable(totalPrisonerPopulation, states)

meltedTotalPrisonerPopulation.head()
/anaconda3/envs/SpringBoard/lib/python3.7/site-packages/pandas/core/indexing.py:543: SettingWithCopyWarning:


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy

Out[8]:
Jurisdiction Jurisdiction Abbreviation Year Population
0 Alabama AL 1978 5625
1 Alabama AL 1979 5464
2 Alabama AL 1980 6543
3 Alabama AL 1981 7657
4 Alabama AL 1982 9233
In [9]:
regions = pd.read_excel("./messy_data/state_region.xlsx")
regions.head()
Out[9]:
State Division Region
0 Connecticut New England Northeast
1 Maine New England Northeast
2 Massachusetts New England Northeast
3 New Hampshire New England Northeast
4 Rhode Island New England Northeast
In [10]:
regions.drop(regions.index[regions.State == "District of Columbia"], inplace=True)
In [11]:
def aggregateRegionalPopulationSum(table, regions):
    
    table = regions.merge(table, left_on="State", right_on="Jurisdiction", how="right")
    temp_index = table["Jurisdiction"] == "Federal"
    table.loc[temp_index, "Region"] = "Federal"
    table.loc[temp_index, "Division"] = "Federal"
    
    regionSum = table.groupby(["Year", "Region"]).Population.sum()
    divisionSum = table.groupby(["Year", "Region", "Division"]).Population.sum()
    
    return regionSum, divisionSum

    
In [12]:
malePrisonerPopulationRegionSum, malePrisonerPopulationDivisionSum = aggregateRegionalPopulationSum(meltedMalePrisonerPopulation, regions)
femalePrisonerPopulationRegionSum, femalePrisonerPopulationDivisionSum = aggregateRegionalPopulationSum(meltedFemalePrisonerPopulation, regions)
totalPrisonerPopulationRegionSum, totalPrisonerPopulationDivisionSum = aggregateRegionalPopulationSum(meltedTotalPrisonerPopulation, regions)
In [13]:
def plotAnnualStatePopulation(table, gender):
    
    table = table[table.Jurisdiction != "Federal"]

    cmax = table.Population.max()
    cmin = table.Population.min()
    
    years = table.Year.unique()

    heatMapData = [{"type" : "choropleth",  "locations" : table.loc[table.Year == year, "Jurisdiction Abbreviation"], "locationmode" : "USA-states", "colorscale" : "Viridis", "zmin" : cmin, "zmax" : cmax, "z" : table.Population[table.Year == year].astype(float)} for year in years]
    mapLayout = [{"geo" : {"scope" : 'usa'}, "title" : gender + " Prisoner Population by State, " + str(year)} for year in years]
    
    for i, data in enumerate(heatMapData):
        usHeatMap = go.Figure(data=[data], layout=mapLayout[i])
        iplot(usHeatMap)
In [14]:
# gender = "Total"

# cmax = table.Population.max()
# cmin = table.Population.min()

# heatMapData = [dict(type = "choropleth", locations=table.loc[table.Year == year, "Jurisdiction Abbreviation"], locationmode="USA-states", colorscale="Viridis", zmin=cmin, zmax=cmax, z=table.Population[table.Year == year].astype(float), text = str(year), geo = 'geo'+str(i+1) if i != 0 else 'geo') for i, year in enumerate(years)]
# mapLayout = {"geo" + (str(i + 1) if i != 0 else "" ) : {"scope" : 'usa', "domain" : dict(x=[0, 1], y=[i / len(years), (i + 1) / len(years)])} for i in range(len(years))}
# fig = {'data' : heatMapData, 'layout' : mapLayout}

# iplot(fig)
In [15]:
init_notebook_mode()
In [16]:
plotAnnualStatePopulation(meltedMalePrisonerPopulation, "Male")
In [17]:
plotAnnualStatePopulation(meltedFemalePrisonerPopulation, "Female")
In [18]:
plotAnnualStatePopulation(meltedTotalPrisonerPopulation, "Total")
In [19]:
usMap = gpd.read_file("./us_states_map.json")
usMap
Out[19]:
GEO_ID STATE NAME LSAD CENSUSAREA geometry
0 0400000US04 04 Arizona 113594.084 POLYGON ((-112.538593 37.000674, -112.534545 3...
1 0400000US05 05 Arkansas 52035.477 POLYGON ((-94.042964 33.019219, -94.043036 33....
2 0400000US06 06 California 155779.220 (POLYGON ((-120.248484 33.999329, -120.247393 ...
3 0400000US08 08 Colorado 103641.888 POLYGON ((-107.317794 41.002957, -107.000606 4...
4 0400000US09 09 Connecticut 4842.355 POLYGON ((-72.397428 42.033302, -72.1988280000...
5 0400000US11 11 District of Columbia 61.048 POLYGON ((-77.03298599999999 38.8395, -77.0316...
6 0400000US13 13 Georgia 57513.485 POLYGON ((-84.81047700000001 34.987607, -84.80...
7 0400000US15 15 Hawaii 6422.628 (POLYGON ((-155.778234 20.245743, -155.772734 ...
8 0400000US17 17 Illinois 55518.930 POLYGON ((-89.36603100000001 42.500274, -89.36...
9 0400000US18 18 Indiana 35826.109 POLYGON ((-84.804119 40.352757, -84.803917 40....
10 0400000US22 22 Louisiana 43203.905 (POLYGON ((-88.865067 29.752714, -88.889754999...
11 0400000US27 27 Minnesota 79626.743 POLYGON ((-92.191501 46.672586, -92.1971450000...
12 0400000US28 28 Mississippi 46923.274 (POLYGON ((-89.095623 30.231767, -89.077259 30...
13 0400000US30 30 Montana 145545.801 POLYGON ((-111.044275 45.001345, -111.056207 4...
14 0400000US35 35 New Mexico 121298.148 POLYGON ((-105.998003 32.002328, -106.09976 32...
15 0400000US38 38 North Dakota 69000.798 POLYGON ((-100.511949 45.943654, -100.627681 4...
16 0400000US40 40 Oklahoma 68594.921 POLYGON ((-100.000381 34.746358, -100.000381 3...
17 0400000US42 42 Pennsylvania 44742.703 POLYGON ((-79.476662 39.721078, -79.608223 39....
18 0400000US47 47 Tennessee 41234.896 POLYGON ((-83.47210800000001 36.597284, -83.27...
19 0400000US51 51 Virginia 39490.086 (POLYGON ((-75.242266 38.027209, -75.296871 37...
20 0400000US72 72 Puerto Rico 3423.775 (POLYGON ((-65.280764 18.288274, -65.283269 18...
21 0400000US10 10 Delaware 1948.543 (POLYGON ((-75.564927 39.583248, -75.576271000...
22 0400000US54 54 West Virginia 24038.210 POLYGON ((-78.571901 39.031995, -78.565837 39....
23 0400000US55 55 Wisconsin 54157.805 (POLYGON ((-90.456677 47.016737, -90.455299999...
24 0400000US56 56 Wyoming 97093.141 POLYGON ((-104.055077 43.936535, -104.055104 4...
25 0400000US01 01 Alabama 50645.326 (POLYGON ((-88.124658 30.28364, -88.0868119999...
26 0400000US02 02 Alaska 570640.950 (POLYGON ((-162.255031 54.978353, -162.249682 ...
27 0400000US12 12 Florida 53624.759 (POLYGON ((-80.250581 25.34193, -80.2549159999...
28 0400000US16 16 Idaho 82643.117 POLYGON ((-111.048974 44.474072, -111.049194 4...
29 0400000US20 20 Kansas 81758.717 POLYGON ((-99.541116 36.999573, -99.5580680000...
30 0400000US24 24 Maryland 9707.241 (POLYGON ((-76.048373 38.12055, -76.056811 38....
31 0400000US34 34 New Jersey 7354.220 POLYGON ((-74.90023600000001 40.077149, -74.83...
32 0400000US37 37 North Carolina 48617.905 (POLYGON ((-75.753765 35.199612, -75.74522 35....
33 0400000US45 45 South Carolina 30060.696 POLYGON ((-82.216217 35.196044, -82.195483 35....
34 0400000US53 53 Washington 66455.521 (POLYGON ((-122.397349 47.912401, -122.419274 ...
35 0400000US50 50 Vermont 9216.657 POLYGON ((-72.458519 42.726853, -72.86418 42.7...
36 0400000US49 49 Utah 82169.620 POLYGON ((-111.046689 42.001567, -111.046402 4...
37 0400000US19 19 Iowa 55857.130 POLYGON ((-91.16306400000001 42.986781, -91.14...
38 0400000US21 21 Kentucky 39486.338 (POLYGON ((-89.5391 36.498201, -89.560344 36.5...
39 0400000US23 23 Maine 30842.923 (POLYGON ((-69.307908 43.773767, -69.306751000...
40 0400000US25 25 Massachusetts 7800.058 (POLYGON ((-70.821001 41.587268, -70.821743 41...
41 0400000US26 26 Michigan 56538.901 (POLYGON ((-85.566441 45.760222, -85.54956 45....
42 0400000US29 29 Missouri 68741.522 POLYGON ((-89.545006 36.336809, -89.560439 36....
43 0400000US31 31 Nebraska 76824.171 POLYGON ((-104.05283 41.697954, -104.052774 41...
44 0400000US32 32 Nevada 109781.180 POLYGON ((-114.046555 40.116931, -114.047134 3...
45 0400000US33 33 New Hampshire 8952.651 POLYGON ((-70.81954899999999 43.123231, -70.78...
46 0400000US36 36 New York 47126.399 (POLYGON ((-73.77336099999999 40.859449, -73.7...
47 0400000US39 39 Ohio 40860.694 (POLYGON ((-82.700208 41.61219, -82.691123 41....
48 0400000US41 41 Oregon 95988.013 POLYGON ((-121.908267 45.654399, -121.900858 4...
49 0400000US44 44 Rhode Island 1033.814 (POLYGON ((-71.38358599999999 41.464782, -71.3...
50 0400000US46 46 South Dakota 75811.000 POLYGON ((-104.055104 43.853478, -104.055077 4...
51 0400000US48 48 Texas 261231.711 (POLYGON ((-96.83002999999999 28.111842, -96.8...
In [20]:
usMap = usMap.loc[list(map(lambda x: x not in ["Puerto Rico", "District of Columbia"], usMap.NAME))]
usMap
Out[20]:
GEO_ID STATE NAME LSAD CENSUSAREA geometry
0 0400000US04 04 Arizona 113594.084 POLYGON ((-112.538593 37.000674, -112.534545 3...
1 0400000US05 05 Arkansas 52035.477 POLYGON ((-94.042964 33.019219, -94.043036 33....
2 0400000US06 06 California 155779.220 (POLYGON ((-120.248484 33.999329, -120.247393 ...
3 0400000US08 08 Colorado 103641.888 POLYGON ((-107.317794 41.002957, -107.000606 4...
4 0400000US09 09 Connecticut 4842.355 POLYGON ((-72.397428 42.033302, -72.1988280000...
6 0400000US13 13 Georgia 57513.485 POLYGON ((-84.81047700000001 34.987607, -84.80...
7 0400000US15 15 Hawaii 6422.628 (POLYGON ((-155.778234 20.245743, -155.772734 ...
8 0400000US17 17 Illinois 55518.930 POLYGON ((-89.36603100000001 42.500274, -89.36...
9 0400000US18 18 Indiana 35826.109 POLYGON ((-84.804119 40.352757, -84.803917 40....
10 0400000US22 22 Louisiana 43203.905 (POLYGON ((-88.865067 29.752714, -88.889754999...
11 0400000US27 27 Minnesota 79626.743 POLYGON ((-92.191501 46.672586, -92.1971450000...
12 0400000US28 28 Mississippi 46923.274 (POLYGON ((-89.095623 30.231767, -89.077259 30...
13 0400000US30 30 Montana 145545.801 POLYGON ((-111.044275 45.001345, -111.056207 4...
14 0400000US35 35 New Mexico 121298.148 POLYGON ((-105.998003 32.002328, -106.09976 32...
15 0400000US38 38 North Dakota 69000.798 POLYGON ((-100.511949 45.943654, -100.627681 4...
16 0400000US40 40 Oklahoma 68594.921 POLYGON ((-100.000381 34.746358, -100.000381 3...
17 0400000US42 42 Pennsylvania 44742.703 POLYGON ((-79.476662 39.721078, -79.608223 39....
18 0400000US47 47 Tennessee 41234.896 POLYGON ((-83.47210800000001 36.597284, -83.27...
19 0400000US51 51 Virginia 39490.086 (POLYGON ((-75.242266 38.027209, -75.296871 37...
21 0400000US10 10 Delaware 1948.543 (POLYGON ((-75.564927 39.583248, -75.576271000...
22 0400000US54 54 West Virginia 24038.210 POLYGON ((-78.571901 39.031995, -78.565837 39....
23 0400000US55 55 Wisconsin 54157.805 (POLYGON ((-90.456677 47.016737, -90.455299999...
24 0400000US56 56 Wyoming 97093.141 POLYGON ((-104.055077 43.936535, -104.055104 4...
25 0400000US01 01 Alabama 50645.326 (POLYGON ((-88.124658 30.28364, -88.0868119999...
26 0400000US02 02 Alaska 570640.950 (POLYGON ((-162.255031 54.978353, -162.249682 ...
27 0400000US12 12 Florida 53624.759 (POLYGON ((-80.250581 25.34193, -80.2549159999...
28 0400000US16 16 Idaho 82643.117 POLYGON ((-111.048974 44.474072, -111.049194 4...
29 0400000US20 20 Kansas 81758.717 POLYGON ((-99.541116 36.999573, -99.5580680000...
30 0400000US24 24 Maryland 9707.241 (POLYGON ((-76.048373 38.12055, -76.056811 38....
31 0400000US34 34 New Jersey 7354.220 POLYGON ((-74.90023600000001 40.077149, -74.83...
32 0400000US37 37 North Carolina 48617.905 (POLYGON ((-75.753765 35.199612, -75.74522 35....
33 0400000US45 45 South Carolina 30060.696 POLYGON ((-82.216217 35.196044, -82.195483 35....
34 0400000US53 53 Washington 66455.521 (POLYGON ((-122.397349 47.912401, -122.419274 ...
35 0400000US50 50 Vermont 9216.657 POLYGON ((-72.458519 42.726853, -72.86418 42.7...
36 0400000US49 49 Utah 82169.620 POLYGON ((-111.046689 42.001567, -111.046402 4...
37 0400000US19 19 Iowa 55857.130 POLYGON ((-91.16306400000001 42.986781, -91.14...
38 0400000US21 21 Kentucky 39486.338 (POLYGON ((-89.5391 36.498201, -89.560344 36.5...
39 0400000US23 23 Maine 30842.923 (POLYGON ((-69.307908 43.773767, -69.306751000...
40 0400000US25 25 Massachusetts 7800.058 (POLYGON ((-70.821001 41.587268, -70.821743 41...
41 0400000US26 26 Michigan 56538.901 (POLYGON ((-85.566441 45.760222, -85.54956 45....
42 0400000US29 29 Missouri 68741.522 POLYGON ((-89.545006 36.336809, -89.560439 36....
43 0400000US31 31 Nebraska 76824.171 POLYGON ((-104.05283 41.697954, -104.052774 41...
44 0400000US32 32 Nevada 109781.180 POLYGON ((-114.046555 40.116931, -114.047134 3...
45 0400000US33 33 New Hampshire 8952.651 POLYGON ((-70.81954899999999 43.123231, -70.78...
46 0400000US36 36 New York 47126.399 (POLYGON ((-73.77336099999999 40.859449, -73.7...
47 0400000US39 39 Ohio 40860.694 (POLYGON ((-82.700208 41.61219, -82.691123 41....
48 0400000US41 41 Oregon 95988.013 POLYGON ((-121.908267 45.654399, -121.900858 4...
49 0400000US44 44 Rhode Island 1033.814 (POLYGON ((-71.38358599999999 41.464782, -71.3...
50 0400000US46 46 South Dakota 75811.000 POLYGON ((-104.055104 43.853478, -104.055077 4...
51 0400000US48 48 Texas 261231.711 (POLYGON ((-96.83002999999999 28.111842, -96.8...
In [21]:
def plotGeoPandasUSMap(usMap, title=None, dataColumn=None, dataLimit=(None, None)):
    
    fig, mapAx = plt.subplots(1, 1)
    
    mapLimitW, mapLimitE, mapLimitS, mapLimitN = -185, -65, 15, 75
    mapAx.axis((mapLimitW, mapLimitE, mapLimitS, mapLimitN))
    mapAx.axis('off')
    mapAx.set_aspect('equal', 'box')
    mapAx.set_title(title)
    
    usMap.plot(column=dataColumn, figsize=(20, 20), edgecolor="k", ax=mapAx, legend=True, vmin=dataLimit[0], vmax=dataLimit[1])
    
#     sm = plt.cm.ScalarMappable(cmap='viridis_r', norm=plt.Normalize(vmin=dataLimit[0], vmax=dataLimit[1]))
#     sm._A = []
#     cbar = mapAx.colorbar(sm)
In [22]:
usMap = usMap.merge(states, left_on="NAME", right_on = "Jurisdiction", how="left").drop("Jurisdiction", axis=1)
usMap.columns = ["GEO_ID", "STATE", "NAME", "LSAD", "CENSUSAREA", "geometry", "Abbreviatio"]
usMap.head()
Out[22]:
GEO_ID STATE NAME LSAD CENSUSAREA geometry Abbreviatio
0 0400000US04 04 Arizona 113594.084 POLYGON ((-112.538593 37.000674, -112.534545 3... AZ
1 0400000US05 05 Arkansas 52035.477 POLYGON ((-94.042964 33.019219, -94.043036 33.... AR
2 0400000US06 06 California 155779.220 (POLYGON ((-120.248484 33.999329, -120.247393 ... CA
3 0400000US08 08 Colorado 103641.888 POLYGON ((-107.317794 41.002957, -107.000606 4... CO
4 0400000US09 09 Connecticut 4842.355 POLYGON ((-72.397428 42.033302, -72.1988280000... CT
In [23]:
usMap = usMap.merge(regions, left_on="NAME", right_on = "State", how="left").drop("State", axis=1)
usMap.head()
Out[23]:
GEO_ID STATE NAME LSAD CENSUSAREA geometry Abbreviatio Division Region
0 0400000US04 04 Arizona 113594.084 POLYGON ((-112.538593 37.000674, -112.534545 3... AZ Mountain West
1 0400000US05 05 Arkansas 52035.477 POLYGON ((-94.042964 33.019219, -94.043036 33.... AR West South Central South
2 0400000US06 06 California 155779.220 (POLYGON ((-120.248484 33.999329, -120.247393 ... CA Pacific West
3 0400000US08 08 Colorado 103641.888 POLYGON ((-107.317794 41.002957, -107.000606 4... CO Mountain West
4 0400000US09 09 Connecticut 4842.355 POLYGON ((-72.397428 42.033302, -72.1988280000... CT New England Northeast
In [24]:
def plotRegionalPopulation(table, usMap, gender):
    
    regionType = table.index.names[1]

    tableWOFederal = table[table.index.get_level_values(1) != "Federal"]

    years = table.index.get_level_values(0).unique()
    dividedTableWOFederal = [tableWOFederal[year] for year in years]

    usRegionMap = usMap[[regionType, "geometry"]].dissolve(regionType)

    colorlimit = (tableWOFederal.min(), tableWOFederal.max())
    for i, oneYearTable in enumerate(dividedTableWOFederal):
        year = years[i]

        oneYearTable = usRegionMap.join(oneYearTable)

        figtitle = gender + " Prisoner Population by " + regionType + ", " + str(year)
        plotGeoPandasUSMap(oneYearTable, figtitle, "Population", colorlimit)


    figtitle = gender + " Prisoner Population by " + regionType
    ax = table.unstack().plot(title = figtitle)
    ax.set_ylim(bottom=0)
    ax.tick_params(axis = "y", length=0)
    ax.legend(loc='upper center', bbox_to_anchor=(0.5, -0.15), fancybox=True, shadow=True, ncol=len(table.index.get_level_values(1).unique()))
In [25]:
plotRegionalPopulation(malePrisonerPopulationRegionSum, usMap, "Male")
/anaconda3/envs/SpringBoard/lib/python3.7/site-packages/ipykernel_launcher.py:3: RuntimeWarning:

More than 20 figures have been opened. Figures created through the pyplot interface (`matplotlib.pyplot.figure`) are retained until explicitly closed and may consume too much memory. (To control this warning, see the rcParam `figure.max_open_warning`).

/anaconda3/envs/SpringBoard/lib/python3.7/site-packages/pandas/plotting/_core.py:304: RuntimeWarning:

More than 20 figures have been opened. Figures created through the pyplot interface (`matplotlib.pyplot.figure`) are retained until explicitly closed and may consume too much memory. (To control this warning, see the rcParam `figure.max_open_warning`).

In [26]:
plotRegionalPopulation(femalePrisonerPopulationRegionSum, usMap, "Female")
/anaconda3/envs/SpringBoard/lib/python3.7/site-packages/ipykernel_launcher.py:3: RuntimeWarning:

More than 20 figures have been opened. Figures created through the pyplot interface (`matplotlib.pyplot.figure`) are retained until explicitly closed and may consume too much memory. (To control this warning, see the rcParam `figure.max_open_warning`).

In [27]:
plotRegionalPopulation(totalPrisonerPopulationRegionSum, usMap, "Total")
/anaconda3/envs/SpringBoard/lib/python3.7/site-packages/ipykernel_launcher.py:3: RuntimeWarning:

More than 20 figures have been opened. Figures created through the pyplot interface (`matplotlib.pyplot.figure`) are retained until explicitly closed and may consume too much memory. (To control this warning, see the rcParam `figure.max_open_warning`).

In [28]:
def dropRegionIndex(table):
    table = copy.deepcopy(table)
    
    table.index = table.index.droplevel(1)
    return table


plotRegionalPopulation(dropRegionIndex(malePrisonerPopulationDivisionSum), usMap, "Male")
/anaconda3/envs/SpringBoard/lib/python3.7/site-packages/ipykernel_launcher.py:3: RuntimeWarning:

More than 20 figures have been opened. Figures created through the pyplot interface (`matplotlib.pyplot.figure`) are retained until explicitly closed and may consume too much memory. (To control this warning, see the rcParam `figure.max_open_warning`).

In [29]:
plotRegionalPopulation(dropRegionIndex(femalePrisonerPopulationDivisionSum), usMap, "Female")
/anaconda3/envs/SpringBoard/lib/python3.7/site-packages/ipykernel_launcher.py:3: RuntimeWarning:

More than 20 figures have been opened. Figures created through the pyplot interface (`matplotlib.pyplot.figure`) are retained until explicitly closed and may consume too much memory. (To control this warning, see the rcParam `figure.max_open_warning`).

In [30]:
plotRegionalPopulation(dropRegionIndex(totalPrisonerPopulationDivisionSum), usMap, "Total")
/anaconda3/envs/SpringBoard/lib/python3.7/site-packages/ipykernel_launcher.py:3: RuntimeWarning:

More than 20 figures have been opened. Figures created through the pyplot interface (`matplotlib.pyplot.figure`) are retained until explicitly closed and may consume too much memory. (To control this warning, see the rcParam `figure.max_open_warning`).

In [31]:
custodyPopulation_11_16 = pd.read_excel("./messy_data/Prison occupation rate.xlsx", "Custody Population", na_values="/")
occupancyLowRate_11_16 = pd.read_excel("./messy_data/Prison occupation rate.xlsx", "Highest Capacity", na_values="/")
occupancyHighRate_11_16 = pd.read_excel("./messy_data/Prison occupation rate.xlsx", "Lowest Capacity", na_values="/")
In [32]:
custodyPopulation_11_16
Out[32]:
Jurisdiction 2011 2012 2013 2014 2015 2016
0 Federal 176228 176658 174242.0 169840 160946 154339
1 Alabama 26268 26230 26271.0 25664 24814 23397
2 Alaska 3708 4575 5054.0 5188 5247 4378
3 Arizona 33492 33578 34626.0 35181 35733 42248
4 Arkansas 14090 14043 14295.0 15250 15784 15833
5 California 138274 123090 122798.0 119071 116569 117557
6 Colorado 17559 16389 16286.0 16687 15972 15922
7 Connecticut 17022 16347 16594.0 16167 15500 14532
8 Delaware 6546 6730 6798.0 6730 6437 6334
9 Florida 100861 99835 100940.0 100873 99485 85834
10 Georgia 52844 55178 53701.0 52719 52002 53433
11 Hawaii 3687 3661 3752.0 3965 4073 3777
12 Idaho 7470 7715 7219.0 7497 7238 7221
13 Illinois 48427 49348 48653.0 48278 46240 43616
14 Indiana 24450 23783 28495.0 28073 26586 25143
15 Iowa 9115 8735 8106.0 8209 8230 8378
16 Kansas 9256 9422 9515.0 9539 9533 9653
17 Kentucky 11951 12186 12141.0 12114 11959 11867
18 Louisiana 18844 18601 18794.0 18710 18447 17932
19 Maine 1978 1977 2073.0 2199 2190 2356
20 Maryland 22923 21783 21676.0 21236 20921 20211
21 Massachusetts 11467 11127 10622.0 10447 9493 9038
22 Michigan 42904 43594 43704.0 43359 42628 41122
23 Minnesota 9309 9421 9391.0 9576 9578 9509
24 Mississippi 15390 15791 15591.0 13069 13967 13720
25 Missouri 30969 31205 31499.0 31903 32295 32427
26 Montana 1707 1677 1666.0 1687 1686 1718
27 Nebraska 4657 4721 5012.0 5228 5133 5167
28 Nevada 12159 12594 NaN 12693 13235 13932
29 New Hampshire 2423 2568 2848.0 2723 2661 2599
30 New Jersey 20755 20333 19528.0 18633 17431 16738
31 New Mexico 3834 3641 3783.0 3876 4078 3956
32 New York 55196 54058 53312.0 52362 51485 50611
33 North Carolina 39662 37378 37176.0 37348 36888 35970
34 North Dakota 1385 1413 1571.0 1325 1345 1379
35 Ohio 47957 45529 46224.0 46151 46190 45913
36 Oklahoma 17724 17947 18313.0 19126 19875 19218
37 Oregon 13728 14123 14605.0 14492 14655 14579
38 Pennsylvania 48515 49009 49735.0 48538 48241 48287
39 Rhode Island 3032 3042 3168.0 3133 2982 2887
40 South Carolina 22343 21867 21534.0 20948 20457 20376
41 South Dakota 3551 3582 3596.0 3497 3514 3770
42 Tennessee 14684 14652 15655.0 15699 14628 14106
43 Texas 141353 136578 140839.0 139879 138199 137584
44 Utah 5294 5309 5382.0 5307 4831 4502
45 Vermont 1531 1530 1579.0 1548 1509 1471
46 Virginia 28962 28149 28431.0 28480 30430 29882
47 Washington 17109 16919 17760.0 17180 17222 17228
48 West Virginia 5149 5335 5708.0 5867 5925 5899
49 Wisconsin 22381 22401 22443.0 22572 22914 23163
50 Wyoming 1917 1951 2036.0 2114 2133 2083
In [33]:
def cleanAndMeltCustodyOccupancyTable(table, states, tableType):
    
    table = table.melt(id_vars = "Jurisdiction", var_name = "Year", value_name = tableType)
    if tableType == "Occupancy":
        table.Occupancy = table.Occupancy / 100
    
    table.Year = table.Year.astype(int)
    table = states.merge(table, on="Jurisdiction")
    
    return table
In [34]:
meltedCustodyPopulation_11_16 = cleanAndMeltCustodyOccupancyTable(custodyPopulation_11_16, states, "Population")
meltedOccupancyLowRate_11_16 = cleanAndMeltCustodyOccupancyTable(occupancyLowRate_11_16, states, "Occupancy")
meltedOccupancyHighRate_11_16 = cleanAndMeltCustodyOccupancyTable(occupancyHighRate_11_16, states, "Occupancy")
In [35]:
meltedCustodyPopulation_11_16.head()
Out[35]:
Jurisdiction Jurisdiction Abbreviation Year Population
0 Alabama AL 2011 26268.0
1 Alabama AL 2012 26230.0
2 Alabama AL 2013 26271.0
3 Alabama AL 2014 25664.0
4 Alabama AL 2015 24814.0
In [36]:
meltedOccupancyHighRate_11_16.head()
Out[36]:
Jurisdiction Jurisdiction Abbreviation Year Occupancy
0 Alabama AL 2011 1.960000
1 Alabama AL 2012 1.957000
2 Alabama AL 2013 1.972593
3 Alabama AL 2014 1.927016
4 Alabama AL 2015 1.863000
In [37]:
def plotAnnualStateOccupancy(table, low_or_high):
    
    table = table[table.Occupancy != "Federal"]

    cmax = table.Occupancy.max()
    cmin = table.Occupancy.min()
    
    years = table.Year.unique()

    heatMapData = [{"type" : "choropleth",  "locations" : table.loc[table.Year == year, "Jurisdiction Abbreviation"], "locationmode" : "USA-states", "colorscale" : "Viridis", "zmin" : cmin, "zmax" : cmax, "z" : table.Occupancy[table.Year == year].astype(float)} for year in years]
    mapLayout = [{"geo" : {"scope" : 'usa'}, "title" : "Prison Occupancy (" + low_or_high + " Estimation) by State, " + str(year)} for year in years]
    
    for i, data in enumerate(heatMapData):
        usHeatMap = go.Figure(data=[data], layout=mapLayout[i])
        iplot(usHeatMap)
In [38]:
plotAnnualStateOccupancy(meltedOccupancyLowRate_11_16, "Low")
/anaconda3/envs/SpringBoard/lib/python3.7/site-packages/pandas/core/ops.py:1649: FutureWarning:

elementwise comparison failed; returning scalar instead, but in the future will perform elementwise comparison

In [39]:
plotAnnualStateOccupancy(meltedOccupancyHighRate_11_16, "High")
In [40]:
def aggregateRegionalOccupancy(table, populationTable, regions):
    
    table = regions.merge(table.dropna(), left_on="State", right_on="Jurisdiction", how="right")
    table = table.merge(populationTable[["Jurisdiction", "Year", "Population"]].dropna(), on=["Jurisdiction", "Year"], how="left")

    temp_index = table["Jurisdiction"] == "Federal"
    table.loc[temp_index, "Region"] = "Federal"
    table.loc[temp_index, "Division"] = "Federal"
    table["Capacity"] = table.Population / table.Occupancy
    
    regionCapacity = table.groupby(["Year", "Region"]).Capacity.sum()
    divisionCapacity = table.groupby(["Year", "Region", "Division"]).Capacity.sum()
    
    regionPopulation = table.groupby(["Year", "Region"]).Population.sum()
    divisionPopulation = table.groupby(["Year", "Region", "Division"]).Population.sum()
    
    regionOccupancy = (regionPopulation / regionCapacity).rename("Occupancy")
    divisionOccupancy = (divisionPopulation / divisionCapacity).rename("Occupancy")
    
    return regionOccupancy, divisionOccupancy
In [41]:
regionLowOccupancy, divisionLowOccupancy = aggregateRegionalOccupancy(meltedOccupancyLowRate_11_16, meltedCustodyPopulation_11_16, regions)
regionHighOccupancy, divisionHighOccupancy = aggregateRegionalOccupancy(meltedOccupancyHighRate_11_16, meltedCustodyPopulation_11_16, regions)
In [42]:
regionLowOccupancy
Out[42]:
Year  Region   
2011  Federal      1.380000
      Midwest      1.077601
      Northeast    1.016331
      South        0.889082
      West         1.210281
2012  Federal      1.373000
      Midwest      1.034894
      Northeast    0.997858
      South        0.877879
      West         1.132735
2013  Federal      1.331037
      Midwest      1.115274
      Northeast    0.999224
      South        0.888064
      West         1.132297
2014  Federal      1.280000
      Midwest      1.109416
      Northeast    0.985143
      South        0.889829
      West         0.921820
2015  Federal      1.197000
      Midwest      1.095396
      Northeast    0.961326
      South        0.899980
      West         0.907239
2016  Federal      1.140430
      Midwest      1.043553
      Northeast    0.918713
      South        0.910598
      West         0.937919
Name: Occupancy, dtype: float64
In [43]:
divisionLowOccupancy
Out[43]:
Year  Region     Division          
2011  Federal    Federal               1.380000
      Midwest    East North Central    1.099671
                 West North Central    1.035526
      Northeast  Mid-Atlantic          1.001750
                 New England           1.115214
      South      East South Central    0.805975
                 South Atlantic        0.917843
                 West South Central    0.881235
      West       Mountain              0.860362
                 Pacific               1.448103
2012  Federal    Federal               1.373000
      Midwest    East North Central    1.031675
                 West North Central    1.043673
      Northeast  Mid-Atlantic          0.984457
                 New England           1.088153
      South      East South Central    0.796217
                 South Atlantic        0.914648
                 West South Central    0.859317
      West       Mountain              0.871882
                 Pacific               1.336828
2013  Federal    Federal               1.331037
      Midwest    East North Central    1.143384
                 West North Central    1.044429
      Northeast  Mid-Atlantic          0.987791
                 New England           1.074341
      South      East South Central    0.791955
                 South Atlantic        0.903849
                 West South Central    0.905175
      West       Mountain              0.860498
                 Pacific               1.318337
2014  Federal    Federal               1.280000
      Midwest    East North Central    1.138908
                 West North Central    1.036418
      Northeast  Mid-Atlantic          0.972415
                 New England           1.068524
      South      East South Central    0.768438
                 South Atlantic        0.909722
                 West South Central    0.911157
      West       Mountain              0.864469
                 Pacific               0.950348
2015  Federal    Federal               1.197000
      Midwest    East North Central    1.122013
                 West North Central    1.030593
      Northeast  Mid-Atlantic          0.953354
                 New England           1.014071
      South      East South Central    0.822564
                 South Atlantic        0.921198
                 West South Central    0.899393
      West       Mountain              0.846741
                 Pacific               0.937674
2016  Federal    Federal               1.140430
      Midwest    East North Central    1.048168
                 West North Central    1.034928
      Northeast  Mid-Atlantic          0.946959
                 New England           0.773357
      South      East South Central    0.818840
                 South Atlantic        0.954584
                 West South Central    0.888155
      West       Mountain              0.930806
                 Pacific               0.942105
Name: Occupancy, dtype: float64
In [44]:
def plotRegionalOccupancy(table, usMap, low_or_high):
    
    regionType = table.index.names[1]

    tableWOFederal = table[table.index.get_level_values(1) != "Federal"]

    years = table.index.get_level_values(0).unique()
    dividedTableWOFederal = [tableWOFederal[year] for year in years]

    usRegionMap = usMap[[regionType, "geometry"]].dissolve(regionType)

    colorlimit = (tableWOFederal.min(), tableWOFederal.max())
    for i, oneYearTable in enumerate(dividedTableWOFederal):
        year = years[i]
        
        oneYearTable = usRegionMap.join(oneYearTable)

        figtitle = "Prison Occupancy (" + low_or_high + " Estimate) by " + regionType + ", " + str(year)
        plotGeoPandasUSMap(oneYearTable, figtitle, "Occupancy", colorlimit)


    figtitle = "Prison Occupancy (" + low_or_high + " Estimate) by " + regionType
    ax = table.unstack().plot(title = figtitle)
    ax.tick_params(axis = "y", length=0)
    ax.legend(loc='upper center', bbox_to_anchor=(0.5, -0.15), fancybox=True, shadow=True, ncol=len(table.index.get_level_values(1).unique()))
In [45]:
plotRegionalOccupancy(regionLowOccupancy, usMap, "Low")
In [46]:
plotRegionalOccupancy(regionHighOccupancy, usMap, "High")
In [47]:
plotRegionalOccupancy(dropRegionIndex(divisionLowOccupancy), usMap, "Low")
In [48]:
plotRegionalOccupancy(dropRegionIndex(divisionHighOccupancy), usMap, "High")
In [49]:
totalAdmission = pd.read_excel("./messy_data/admissions.xlsx", sheet_name="Total").dropna().replace("/", np.nan)
totalAdmission.head()
Out[49]:
Jurisdiction 1978 1979 1980 1981 1982 1983 1984 1985 1986 ... 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016
1 Federal 14724.0 14120.0 12598.0 12830.0 14818.0 16745.0 16013.0 17916.0 18501.0 ... 53618 53662 56153 54121 60634 55938 53664 50865 46912 44682
3 Alabama 2572.0 2597.0 3766.0 4025.0 4425.0 4605.0 4701.0 4370.0 3962.0 ... 10708 11037 13093 11881 11387 11203 11265 10912 10451 10749
4 Alaska 258.0 311.0 459.0 461.0 541.0 711.0 727.0 875.0 1097.0 ... NaN NaN NaN 2650 3789 3906 3906 3846 4271 1804
5 Arizona 1620.0 1641.0 2082.0 2759.0 2910.0 3288.0 3386.0 3989.0 4515.0 ... 14046 14867 14526 13249 13030 12970 13538 14439 14670 13663
6 Arkansas 1958.0 2189.0 2311.0 2419.0 2323.0 2173.0 2179.0 2301.0 2280.0 ... 6651 7017 7383 7603 7059 5782 8987 9435 9351 9911

5 rows × 40 columns

In [50]:
maleAdmission = pd.read_excel("./messy_data/admissions.xlsx", sheet_name="Male").dropna().replace("/", np.nan)
femaleAdmission = pd.read_excel("./messy_data/admissions.xlsx", sheet_name="Female").dropna().replace("/", np.nan)
In [51]:
def cleanAndMeltCountTable(table, states):
    
    # add DC data to Federal, and delete it
    table.loc[table.Jurisdiction == "Federal"].iloc[:, 1:] = table.loc[table.Jurisdiction == "Federal"].iloc[:, 1:] + table.loc[table.Jurisdiction == "District of Columbia"].iloc[:, 1:].replace("--", 0)
    table.drop(table.index[table.Jurisdiction == "District of Columbia"], inplace=True)

    table = table.melt(id_vars = "Jurisdiction", var_name = "Year", value_name = "Count")
    table.Year = table.Year.astype(int)
    
    table = states.merge(table, on="Jurisdiction")
    
    return table
In [53]:
meltedTotalAdmission = cleanAndMeltCountTable(totalAdmission, states)
In [54]:
meltedMaleAdmission = cleanAndMeltCountTable(maleAdmission, states)
meltedFemaleAdmission = cleanAndMeltCountTable(femaleAdmission, states)
In [55]:
meltedTotalAdmission
Out[55]:
Jurisdiction Jurisdiction Abbreviation Year Count
0 Alabama AL 1978 2572
1 Alabama AL 1979 2597
2 Alabama AL 1980 3766
3 Alabama AL 1981 4025
4 Alabama AL 1982 4425
5 Alabama AL 1983 4605
6 Alabama AL 1984 4701
7 Alabama AL 1985 4370
8 Alabama AL 1986 3962
9 Alabama AL 1987 4543
10 Alabama AL 1988 5101
11 Alabama AL 1989 6510
12 Alabama AL 1990 7031
13 Alabama AL 1991 7683
14 Alabama AL 1992 7967
15 Alabama AL 1993 8454
16 Alabama AL 1994 8287
17 Alabama AL 1995 8692
18 Alabama AL 1996 9465
19 Alabama AL 1997 9301
20 Alabama AL 1998 7492
21 Alabama AL 1999 NaN
22 Alabama AL 2000 6296
23 Alabama AL 2001 7428
24 Alabama AL 2002 7033
25 Alabama AL 2003 9524
26 Alabama AL 2004 8278
27 Alabama AL 2005 9723
28 Alabama AL 2006 10039
29 Alabama AL 2007 10708
... ... ... ... ...
1959 Federal FED 1987 18709
1960 Federal FED 1988 18696
1961 Federal FED 1989 23491
1962 Federal FED 1990 NaN
1963 Federal FED 1991 NaN
1964 Federal FED 1992 NaN
1965 Federal FED 1993 25235
1966 Federal FED 1994 27271
1967 Federal FED 1995 27337
1968 Federal FED 1996 30239
1969 Federal FED 1997 33906
1970 Federal FED 1998 38219
1971 Federal FED 1999 41972
1972 Federal FED 2000 43732
1973 Federal FED 2001 45140
1974 Federal FED 2002 48144
1975 Federal FED 2003 52288
1976 Federal FED 2004 52982
1977 Federal FED 2005 56057
1978 Federal FED 2006 57495
1979 Federal FED 2007 53618
1980 Federal FED 2008 53662
1981 Federal FED 2009 56153
1982 Federal FED 2010 54121
1983 Federal FED 2011 60634
1984 Federal FED 2012 55938
1985 Federal FED 2013 53664
1986 Federal FED 2014 50865
1987 Federal FED 2015 46912
1988 Federal FED 2016 44682

1989 rows × 4 columns

In [63]:
totalRelease = pd.read_excel("./messy_data/releases.xlsx", sheet_name="Total").dropna().replace("/", np.nan)
maleRelease = pd.read_excel("./messy_data/releases.xlsx", sheet_name="Male").dropna().replace("/", np.nan)
femaleRelease = pd.read_excel("./messy_data/releases.xlsx", sheet_name="Female").dropna().replace("/", np.nan)
In [64]:
totalRelease
Out[64]:
Jurisdiction 1978 1979 1980 1981 1982 1983 1984 1985 1986 ... 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016
1 Federal 17414.0 18584.0 14788.0 11773.0 13420.0 14484.0 15108.0 13497.0 15203.0 ... 48764 52348 50720 52487 55239 56037 54785 54529 60156 52035
3 Alabama 2726.0 2744.0 3207.0 2908.0 2830.0 3225.0 3861.0 3694.0 3197.0 ... 11079 11556 12231 12070 11052 11253 11488 11585 11446 12711
4 Alaska 235.0 216.0 268.0 271.0 358.0 505.0 501.0 620.0 960.0 ... 3286 3741 3196 3068 3599 3774 3774 3774 4085 2159
5 Arizona 1352.0 1638.0 1469.0 1874.0 2027.0 2243.0 2506.0 3354.0 3647.0 ... 12560 13192 13854 13500 13149 13000 12931 13513 14092 13857
6 Arkansas 1878.0 1872.0 2366.0 2045.0 1724.0 1893.0 1953.0 2168.0 2189.0 ... 6045 6610 6990 6664 7252 6298 6541 8812 9702 10370
7 California 9701.0 13549.0 12346.0 13060.0 16009.0 22140.0 25015.0 30091.0 38247.0 ... 135920 136925 128869 121918 109467 47454 36353 38559 41257 34528
8 Colorado 1274.0 1213.0 1234.0 1542.0 1578.0 1967.0 2153.0 2265.0 2337.0 ... 10604 10616 10858 10558 9367 10919 10220 9869 9958 8934
9 Connecticut 1651.0 2731.0 3035.0 2356.0 2216.0 2280.0 2527.0 2262.0 2272.0 ... 6056 6404 6850 6095 6379 6014 5177 5968 5937 5618
10 Delaware 349.0 356.0 424.0 507.0 449.0 568.0 579.0 572.0 591.0 ... 1905 1617 1697 1681 3600 4012 4251 4222 4261 4041
11 District of Columbia 1555.0 1559.0 1503.0 1483.0 1487.0 1485.0 1475.0 1400.0 1596.0 ... -- -- -- -- -- -- -- -- -- --
12 Florida 7231.0 9978.0 8742.0 9808.0 10133.0 18780.0 11714.0 13767.0 15905.0 ... 28705 37277 37167 32649 34673 33661 32855 32754 32690 31166
13 Georgia 6003.0 4615.0 6155.0 6941.0 7670.0 8747.0 9230.0 8977.0 9232.0 ... 18774 19463 16161 16745 15309 14021 18226 17124 15599 15053
14 Hawaii 106.0 134.0 140.0 104.0 110.0 157.0 164.0 339.0 432.0 ... 1518 1795 1915 1764 1404 1631 1615 1242 1293 1666
15 Idaho 582.0 572.0 614.0 600.0 701.0 716.0 776.0 759.0 770.0 ... 3850 3891 3743 4264 4079 4617 3761 4501 5315 5479
16 Illinois 7084.0 6686.0 8687.0 9709.0 9666.0 8940.0 8373.0 9144.0 10519.0 ... 35737 35780 38034 31055 31155 30108 31370 30055 29650 28615
17 Indiana 2396.0 2131.0 2641.0 3021.0 3584.0 3829.0 3942.0 3721.0 4217.0 ... 17099 18308 19699 19911 18422 18555 17959 17866 16075 14561
18 Iowa 977.0 945.0 894.0 1360.0 1481.0 1687.0 1734.0 1965.0 2024.0 ... 5718 5557 4648 4367 5105 5221 5202 5005 5134 5305
19 Kansas 1518.0 1617.0 1412.0 1540.0 1902.0 1658.0 1549.0 1650.0 1688.0 ... 4966 4655 4721 4553 4671 4795 5133 5554 5924 6394
20 Kentucky 2710.0 2358.0 2970.0 2656.0 3001.0 2865.0 2774.0 2872.0 2815.0 ... 13819 15413 14138 15962 14571 16215 16871 17731 18476 18552
21 Louisiana 1856.0 1848.0 1831.0 2404.0 2319.0 2635.0 3445.0 3619.0 4007.0 ... 14984 14991 14924 17262 16580 17104 17646 17882 17971 16308
22 Maine 505.0 450.0 484.0 366.0 568.0 627.0 481.0 377.0 581.0 ... 1090 720 1141 1176 1049 1108 971 1031 682 647
23 Maryland 4315.0 4407.0 4408.0 3663.0 3251.0 3459.0 3588.0 4068.0 4117.0 ... 10123 10383 10807 9387 9829 10347 9504 9466 10258 9459
24 Massachusetts 1148.0 1151.0 1031.0 1065.0 1295.0 1864.0 1962.0 1991.0 2526.0 ... 2248 2667 2850 2908 2484 2871 2855 2654 2708 2458
25 Michigan 5139.0 5382.0 5154.0 5182.0 5486.0 6472.0 6312.0 3597.0 4463.0 ... 14685 13621 18197 17033 14374 13199 14307 14177 13713 14081
26 Minnesota 1182.0 1223.0 1217.0 1237.0 1400.0 1418.0 1422.0 1436.0 1519.0 ... 7971 7936 7777 7882 7734 7730 7808 7642 7669 8254
27 Mississippi 1202.0 1618.0 1999.0 2090.0 2175.0 2652.0 2354.0 2565.0 2572.0 ... 8455 7817 9285 8694 8197 7725 8201 9442 6104 7080
28 Missouri 2185.0 2524.0 2705.0 2550.0 2497.0 2884.0 3345.0 3399.0 4481.0 ... 19323 18864 18097 17799 17823 17957 18790 18767 17930 18410
29 Montana 344.0 301.0 364.0 425.0 414.0 449.0 438.0 356.0 539.0 ... 2176 2117 2212 2152 2101 2089 2347 2387 2413 2546
30 Nebraska 577.0 596.0 603.0 702.0 590.0 636.0 603.0 513.0 651.0 ... 1952 1963 2107 2123 2391 2688 2583 2284 2317 2366
31 Nevada 627.0 713.0 769.0 856.0 814.0 1024.0 1128.0 1425.0 1445.0 ... 4904 5278 5967 6036 5910 5557 5556 5838 5838 5778
32 New Hampshire 190.0 170.0 199.0 199.0 221.0 231.0 183.0 230.0 296.0 ... 1179 1507 1460 1584 1881 1555 1633 1562 1660 1601
33 New Jersey 3602.0 3563.0 3852.0 3039.0 3403.0 4776.0 4139.0 4868.0 5243.0 ... 14358 13885 12860 12821 11485 10817 10766 10275 10248 9685
34 New Mexico 693.0 805.0 728.0 1065.0 907.0 1057.0 1295.0 1544.0 1600.0 ... 4507 4013 3650 3487 3529 3371 3345 3515 3737 3631
35 New York 7688.0 8342.0 8934.0 8299.0 9624.0 11470.0 11518.0 13057.0 14235.0 ... 27009 27482 25481 25365 24460 24224 23382 22927 21775 22047
36 North Carolina 7311.0 6705.0 6836.0 8335.0 8407.0 9620.0 7626.0 8076.0 8839.0 ... 10074 10615 11495 11539 11878 12327 13829 15264 16353 16677
37 North Dakota 178.0 148.0 146.0 175.0 191.0 236.0 214.0 241.0 308.0 ... 977 1051 1003 1006 1013 1069 1173 1046 1484 1583
38 Ohio 6752.0 7783.0 9104.0 8880.0 8837.0 10254.0 9789.0 8606.0 9813.0 ... 29236 28552 26949 24495 22899 21628 21235 22399 21759 22850
39 Oklahoma 2041.0 2657.0 2137.0 2154.0 2406.0 3060.0 3617.0 3486.0 3286.0 ... 8486 7915 8004 7903 7694 6947 7374 8654 8898 10404
40 Oregon 1963.0 1950.0 2102.0 1928.0 1819.0 2536.0 2470.0 2687.0 3009.0 ... 5080 5055 5422 5290 4567 5023 5048 5432 5084 4712
41 Pennsylvania 3274.0 3266.0 3537.0 3637.0 3803.0 4043.0 4306.0 4612.0 4809.0 ... 16340 15618 14630 16781 17698 18805 19632 20555 20847 20418
42 Rhode Island 230.0 220.0 250.0 249.0 308.0 388.0 404.0 399.0 441.0 ... 884 1086 1246 1113 960 967 885 867 781 939
43 South Carolina 2757.0 3484.0 3610.0 3464.0 3394.0 3443.0 4111.0 4110.0 4327.0 ... 9461 9506 9321 8716 7912 7309 6716 6897 6595 6709
44 South Dakota 329.0 292.0 304.0 343.0 378.0 469.0 492.0 489.0 629.0 ... 3259 3102 3079 2857 2732 1959 1820 2413 2746 2832
45 Tennessee 2439.0 2427.0 2880.0 3319.0 3730.0 3576.0 5431.0 4790.0 3209.0 ... 15537 15414 15762 14735 14961 15955 16348 15556 14488 13508
46 Texas 10488.0 11406.0 11974.0 15111.0 16392.0 23727.0 21991.0 25881.0 31615.0 ... 73023 72168 72320 71497 74544 82130 74093 77277 76189 76733
47 Utah 265.0 357.0 504.0 466.0 619.0 677.0 732.0 776.0 759.0 ... 3393 3400 3498 3109 3206 3063 2988 2979 3346 3611
48 Vermont 269.0 258.0 333.0 332.0 258.0 316.0 324.0 319.0 381.0 ... 2345 2241 1973 2130 2062 1963 1752 1740 1886 1733
49 Virginia 2468.0 3245.0 3063.0 3871.0 5400.0 5281.0 4261.0 4195.0 5032.0 ... 12559 13194 13168 12989 12345 11568 11880 12094 12483 12653
50 Washington 1818.0 1918.0 1927.0 1452.0 1474.0 2108.0 2071.0 2433.0 2721.0 ... 16488 15061 17035 17060 16412 18181 20861 20898 21939 24940
51 West Virginia 438.0 542.0 590.0 515.0 639.0 811.0 866.0 719.0 856.0 ... 2969 3126 2943 3009 3257 3293 3780 3468 3644 3543
52 Wisconsin 1455.0 1494.0 1645.0 1905.0 2019.0 2498.0 2471.0 2151.0 2508.0 ... 8903 9391 8771 8640 7825 7724 5475 5433 5532 5743
53 Wyoming 195.0 215.0 216.0 249.0 223.0 332.0 332.0 353.0 251.0 ... 778 764 824 788 787 878 895 862 900 1041

52 rows × 40 columns

In [65]:
meltedTotalRelease = cleanAndMeltCountTable(totalRelease, states)
meltedMaleRelease = cleanAndMeltCountTable(maleRelease, states)
meltedFemaleRelease = cleanAndMeltCountTable(femaleRelease, states)
In [66]:
meltedTotalRelease
Out[66]:
Jurisdiction Jurisdiction Abbreviation Year Count
0 Alabama AL 1978 2726
1 Alabama AL 1979 2744
2 Alabama AL 1980 3207
3 Alabama AL 1981 2908
4 Alabama AL 1982 2830
5 Alabama AL 1983 3225
6 Alabama AL 1984 3861
7 Alabama AL 1985 3694
8 Alabama AL 1986 3197
9 Alabama AL 1987 3480
10 Alabama AL 1988 5317
11 Alabama AL 1989 5344
12 Alabama AL 1990 5308
13 Alabama AL 1991 6645
14 Alabama AL 1992 7404
15 Alabama AL 1993 7244
16 Alabama AL 1994 7371
17 Alabama AL 1995 7618
18 Alabama AL 1996 8432
19 Alabama AL 1997 8682
20 Alabama AL 1998 7016
21 Alabama AL 1999 8194
22 Alabama AL 2000 7136
23 Alabama AL 2001 7905
24 Alabama AL 2002 7472
25 Alabama AL 2003 10167
26 Alabama AL 2004 9156
27 Alabama AL 2005 10472
28 Alabama AL 2006 11283
29 Alabama AL 2007 11079
... ... ... ... ...
1959 Federal FED 1987 16123
1960 Federal FED 1988 15429
1961 Federal FED 1989 18228
1962 Federal FED 1990 NaN
1963 Federal FED 1991 NaN
1964 Federal FED 1992 NaN
1965 Federal FED 1993 18846
1966 Federal FED 1994 21259
1967 Federal FED 1995 22514
1968 Federal FED 1996 24876
1969 Federal FED 1997 27518
1970 Federal FED 1998 29462
1971 Federal FED 1999 31816
1972 Federal FED 2000 35259
1973 Federal FED 2001 38370
1974 Federal FED 2002 42339
1975 Federal FED 2003 44135
1976 Federal FED 2004 46624
1977 Federal FED 2005 48323
1978 Federal FED 2006 47920
1979 Federal FED 2007 48764
1980 Federal FED 2008 52348
1981 Federal FED 2009 50720
1982 Federal FED 2010 52487
1983 Federal FED 2011 55239
1984 Federal FED 2012 56037
1985 Federal FED 2013 54785
1986 Federal FED 2014 54529
1987 Federal FED 2015 60156
1988 Federal FED 2016 52035

1989 rows × 4 columns

In [ ]: